1)Libraries,Extraction , cleaning and separating:

df<-read.table('C:/Personal/Aravinda Stuff/Sem 5th/DV and FDA proj/Suicides in India 2001-2012.csv',header=T, sep=",")
head(df)
##           State Year Type_code                                    Type Gender
## 1 A & N Islands 2001    Causes                      Illness (Aids/STD) Female
## 2 A & N Islands 2001    Causes Bankruptcy or Sudden change in Economic Female
## 3 A & N Islands 2001    Causes Cancellation/Non-Settlement of Marriage Female
## 4 A & N Islands 2001    Causes       Physical Abuse (Rape/Incest Etc.) Female
## 5 A & N Islands 2001    Causes                           Dowry Dispute Female
## 6 A & N Islands 2001    Causes                         Family Problems Female
##   Age_group Total
## 1      0-14     0
## 2      0-14     0
## 3      0-14     0
## 4      0-14     0
## 5      0-14     0
## 6      0-14     0
#install.packages('tidyverse')
#install.packages('caret')
#install.packages("plotly")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(dplyr)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(treemapify)
library(stringr)  
library(ggthemes)

#Data Cleaning
#Replacing Values for UT
df["State"][df["State"] == "A & N Islands"]<-"A & N Islands-Ut"
df["State"][df["State"] == "Chandigarh"]<-"Chandigarh-Ut"
df["State"][df["State"] == "D & N Haveli"]<-"D & N Haveli-Ut"
df["State"][df["State"] == "Daman & Diu"]<-"Daman & Diu-Ut"
df["State"][df["State"] == "Lakshadweep"]<-"Lakshadweep-Ut"
df["State"][df["State"] == "Delhi"] <-"Delhi-Ut"
head(df)
##              State Year Type_code                                    Type
## 1 A & N Islands-Ut 2001    Causes                      Illness (Aids/STD)
## 2 A & N Islands-Ut 2001    Causes Bankruptcy or Sudden change in Economic
## 3 A & N Islands-Ut 2001    Causes Cancellation/Non-Settlement of Marriage
## 4 A & N Islands-Ut 2001    Causes       Physical Abuse (Rape/Incest Etc.)
## 5 A & N Islands-Ut 2001    Causes                           Dowry Dispute
## 6 A & N Islands-Ut 2001    Causes                         Family Problems
##   Gender Age_group Total
## 1 Female      0-14     0
## 2 Female      0-14     0
## 3 Female      0-14     0
## 4 Female      0-14     0
## 5 Female      0-14     0
## 6 Female      0-14     0
#Renaming causes
df["Type"][df["Type"]=="Bankruptcy or Sudden change in Economic"]<-"Sudden change in Economic Status or Bankruptcy"
df["Type"][df["Type"]=="By Other means (please specify)"]<-"By Other means"
df["Type"][df["Type"]=="Not having Children(Barrenness/Impotency"]<-"Not having Children(Impotency)"
df["Type"][df["Type"]=="By Jumping from (Building)"]<-"By Jumping from Building"
df["Type"][df["Type"]=="Hr. Secondary/Intermediate/Pre-Universit"]<-"Hr. Secondary/Intermediate/Pre-University"
df["Type"][df["Type"]=="Failure in Examination"]<-"Examination Failure"
df["Type"][df["Type"]=="By coming under running vehicles/trains"]<-"By road or railway accidents" 
df["Type"][df["Type"]=="Bankruptcy or Sudden change in Economic Status"]<-"Sudden change in Economic Status or Bankruptcy"
df["Type"][df["Type"]=="Not having Children (Barrenness/Impotency"]<-"Not having Children(Impotency)"
#causescount
head(df)
##              State Year Type_code
## 1 A & N Islands-Ut 2001    Causes
## 2 A & N Islands-Ut 2001    Causes
## 3 A & N Islands-Ut 2001    Causes
## 4 A & N Islands-Ut 2001    Causes
## 5 A & N Islands-Ut 2001    Causes
## 6 A & N Islands-Ut 2001    Causes
##                                             Type Gender Age_group Total
## 1                             Illness (Aids/STD) Female      0-14     0
## 2 Sudden change in Economic Status or Bankruptcy Female      0-14     0
## 3        Cancellation/Non-Settlement of Marriage Female      0-14     0
## 4              Physical Abuse (Rape/Incest Etc.) Female      0-14     0
## 5                                  Dowry Dispute Female      0-14     0
## 6                                Family Problems Female      0-14     0
#drop the unwanted State-titles
df1 <- df[!(df$State=="Total (Uts)" | df$State=="Total (All India)" |  df$State=="Total (States)"),]
#drop the values ==0 under Total
df2 <- df1[!(df1$Total==0),]
# drop the unwanted Types
df2 <- df2[!(df2$Type=="By Other means" | df2$Type=="Others (Please Specify)" | df2$Type=="Causes Not known" |  df2$Type=="Other Causes (Please Specity)"),]

#Spliting the dataframe into smaller dataframe based on the column "Type_code"
causesdf=filter(df2,df2$Type_code=="Causes")
edudf=filter(df2,df2$Type_code=="Education_Status")
meansdf=filter(df2,df2$Type_code=="Means_adopted")
professionaldf=filter(df2,df2$Type_code=="Professional_Profile")
socialdf=filter(df2,df2$Type_code=="Social_Status")

1. Comparison of Suicide attempts with respect to Male or Female

library(plotrix)
#plots the 3-D pie chart
gper<-df2 %>% select(Gender,Total)%>% group_by(Gender)%>% summarise(total_all=sum(Total))%>%mutate(rs=sum(total_all), percent=round((total_all/rs)*100))
#gper
label <-  
  c( paste(gper$Gender[1],gper$percent[1],'%',sep=' '),
     paste(gper$Gender[2],gper$percent[2],'%',sep=' '))
colr<-c("palevioletred1","dodgerblue2")

pie3D(gper$percent,labels=label,labelcex=1.1,explode=0.4,col=colr)

2. Suicide Count based On Age Groups

bardf<-df2%>% select(Gender,Age_group,Total)%>% 
  filter(!Age_group=="0-100")%>% 
  filter(!Age_group=="0-100+")%>%
  group_by(Gender,Age_group)%>% 
  summarise(Total=sum(Total))
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
bardf<-as.data.frame(bardf)
#head(bardf)

fig <- plot_ly(bardf, x = ~Age_group, y = ~Total, type = 'bar',color= ~Gender)
fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

3. States in India VS Suicide Counts

treedf<-df2%>%select(State,Year,Total) %>% 
            group_by(State,Year)%>% summarise(Total=sum(Total))
## `summarise()` has grouped output by 'State'. You can override using the `.groups` argument.
treedf<-as.data.frame(treedf)
#head(treedf)
ggplot(treedf,aes(x=State,y=Total,fill=State))+geom_bar(stat="identity")+
  theme(legend.position="bottom",axis.text.x=element_text(angle=90))

fig <- plot_ly(treedf, x = ~State, y = ~Total, type = 'scatter',size= ~Total,color= ~State)
fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

4. Top 10 States In India

topstate<-df2%>%filter(!State %in% c("Total (All India)","Total (States)","Total (Uts)"))%>%select(State,Year,Total) %>% group_by(State)%>% 
  summarise(Total=sum(Total)) %>% arrange(desc(Total))%>% head(10)
topstate<-as.data.frame(topstate)
#topstate

ggplot(topstate, aes(area = Total, fill = State , label = State)) +
  geom_treemap() +
  geom_treemap_text(fontface = "italic",  place = "centre",grow = TRUE)+
  labs( title="Top 10 States with Higher Rates")

fig <- plot_ly(topstate, x = ~State, y = ~Total, type = 'scatter',size= ~Total,color= ~State)
fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

5. Bottom 10 States In India

bottomstate<-df%>%filter(!State %in% c("Total (All India)","Total (States)","Total (Uts)"))%>% 
      select(State,Year,Total)%>% 
      group_by(State)%>% 
      summarise(Total=sum(Total)) %>% 
      arrange((Total))%>%head(10)
    bottomstate<-as.data.frame(bottomstate)
    #bottomstate
    
    ggplot(bottomstate,aes(x=factor(State,level=State),y=Total,color=State))+geom_point(size=4)+
      geom_segment(aes(xend=State,y=0,yend=Total),size=2)+
      theme(legend.position="none",axis.text.x=element_text(angle=90))+
      labs( title="Bottom 10 States")

6. Top 10 Type Of Suicide Methods

typedf<-df2%>%select(Type,Total) %>% 
  group_by(Type)%>% summarise(Total=sum(Total))
typedf<-as.data.frame(typedf) %>%  arrange(desc(Total)) %>%head(10)
#typedf

figpie <- plot_ly(typedf, labels = ~Type, values = ~Total, type = 'pie',
               textposition = 'inside',
               textinfo = 'label+percent',
               insidetextfont = list(color = '#FFFFFF'),
               hoverinfo = 'text',
               text = ~paste(Total),
               marker = list(colors = colors,line = list(color = '#FFFFFF', width = 1)),
               showlegend = FALSE)
figpie

7. Relation between Education level and suicide

colr<-c("palevioletred1","dodgerblue4","goldenrod3","orangered4",
        "lightsalmon4","mistyrose4","mediumpurple3","slateblue4","slateblue","slategray4","tan")
sc_type<-df2 %>% filter(Type_code =="Education_Status")%>% select(Gender,Total,Type)%>% group_by(Gender,Type)%>% summarise(ttotal=sum(Total))
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
sc_type %>% ggplot(aes(x=str_sub(Type,1,15),y=ttotal,fill=Type))+geom_boxplot()+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Education Level",y="count")

sc_type %>% ggplot(aes(x=str_sub(Type,1,15),y=ttotal,fill=Gender))+geom_bar(stat="identity",position="fill")+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Education Level",y="count")

8. Causes vs Count wrt Year

df2 %>% filter(Type_code=="Causes" & Type %in% c("Failure in Examination","Family Problems","Other Prolonged Illness","Unemployment","Dowry Dispute","Poverty","Insanity/Mental Illness"))%>%select(Year,Total,Type)%>% group_by(Year,Type)%>%summarise(ytot=sum(Total))%>% 
  ggplot(aes(x=factor(Year),y=ytot,color=Type,group=Type))+geom_line(size=1)+scale_color_manual(values=colr)+
  theme(legend.position = "bottom",axis.text.x = element_text(angle=65,vjust=0.5))+labs(x="Year",y="Count")+geom_point(size=2)
## `summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.

9. Age vs Count wrt Year

df2$Year<-as.factor(df2$Year)
agecntdf<-df2%>%filter(!Age_group=="0-100")%>% filter(!Age_group=="0-100+")%>%
  select(Year,Total,Age_group) %>% group_by(Year,Age_group) %>%
  filter(!Age_group=="0-100")%>% 
  filter(!Age_group=="0-100+")%>% summarise(total=sum(Total))
## `summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.
  ggplot(agecntdf,aes(x=Year,y=total,group=Age_group,fill=Age_group))+
  geom_area()+scale_fill_manual(values=colr)

10. Suicide Trend Over the years

temp <- df2 %>% group_by(Year) %>% summarise(total_case=sum(Total))
#temp
fig <- plot_ly(
  x = temp$Year,
  y = temp$total_case,
  type = "bar", color=temp$Year )

fig <- fig %>% layout(title = "Suicide Trend Over the years",
                      barmode = 'group',
                      xaxis = list(title = "Years"),
                      yaxis = list(title = "Count"))

fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

11. Education_Status’s Type vs Suicide Count

temp <- edudf %>% group_by(Type) %>% summarise(total_case=sum(Total))
temp=temp[order(temp$total_case),]
#temp
fig <- plot_ly(x = temp$Type , y =  temp$total_case ,type = 'scatter',size=temp$total_case,color=temp$Type)
fig <- fig %>% layout(title = "Level of education and suicide count",
  xaxis = list(title = 'Level of Education'),
                      yaxis = list(title = 'Count'))

fig
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

12.Type_Code VS Total Count

type_codedf=df2%>%select(Total,Year,Type_code) %>% group_by(Type_code)%>%summarise(Total=sum(Total))
type_codedf<-as.data.frame(type_codedf)
head(type_codedf)
##              Type_code   Total
## 1               Causes  987855
## 2     Education_Status 1455931
## 3        Means_adopted 1311561
## 4 Professional_Profile  970096
## 5        Social_Status 1455931
ggplot(type_codedf, aes(x=Type_code,y=Total, fill=Type_code))+geom_bar(stat="identity")+
  theme(legend.position="bottom")

13. Social Status of suicide Victims

colr<-c("palevioletred1","goldenrod3","orangered4","mistyrose4","mediumpurple3","slateblue4","slateblue","slategray4","tan")
ss<-df2 %>% filter(Type_code =="Social_Status")%>% select(Gender,Total,Type)%>% group_by(Gender,Type)%>% summarise(ttotal=sum(Total))
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ss%>% ggplot(aes(x=str_sub(Type,1,15),y=ttotal,fill=Type))+geom_boxplot()+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Social Status",y="Total Count")

ss%>% ggplot(aes(x=str_sub(Type,1,15),y=ttotal,fill=Gender))+geom_bar(stat="identity",position="fill")+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Social Status",y="Total Count")

14. Professional Profile of suicide Victims

colr<-c("palevioletred1","goldenrod3","orangered4","mistyrose4","mediumpurple3","slateblue4","slateblue","slategray4","tan")
ss<-df2 %>% filter(Type_code =="Professional_Profile")%>% select(Gender,Total,Type)%>% group_by(Gender,Type)%>% summarise(ttotal=sum(Total))
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ss%>% ggplot(aes(x=str_sub(Type,1,15),y=ttotal,fill=Gender))+geom_bar(stat="identity",position="fill")+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Professional_Profile",y="Total Count")

15.Ratio of Means Adopted by suicide Victims w.r.t Gender

colr<-c("palevioletred1","goldenrod3","orangered4","mistyrose4","mediumpurple3","slateblue4","slateblue","slategray4","tan")
ss<-df2 %>% filter(Type_code =="Means_adopted")%>% select(Gender,Total,Type)%>% group_by(Gender,Type)%>% summarise(ttotal=sum(Total))
## `summarise()` has grouped output by 'Gender'. You can override using the `.groups` argument.
ss%>% ggplot(aes(x=str_sub(Type,1,15),y=ttotal,fill=Gender))+geom_bar(stat="identity",position="fill")+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Means_adopted",y="Total Count")

16. Means adopted stacked w.r.t Age Group

ma_type<-df2 %>% filter(Type_code =="Means_adopted") %>%group_by(Type,Gender,Age_group)%>%summarize(mtot=sum(Total))
## `summarise()` has grouped output by 'Type', 'Gender'. You can override using the `.groups` argument.
ma_type%>%ggplot(aes(x=Type,y=mtot,fill=Age_group))+geom_bar(stat="identity",
                                                        position="stack")+scale_fill_manual(values=colr)+theme(legend.position = "bottom",axis.text.x = element_text(angle=90))+labs(x="Means Adopted",y="Count")